

* Please change the “path” to the working directory to run the code

global path "data"
global path_raw "data/raw"
global path_analysis "data/analysis_data"
global path_output "data/output_tables_figures"

/*----------------------------------------------------------------------------*/

// Zillow Listing Data //

* The data is proprietary.
* I provide pseudo data in the replication package. 

clear
import delimited "$path_raw/zillow.csv", stringcols(14)  // Pseudo data "zillow_pseudo.csv" is provided in /data/raw/

drop zip
drop if ctn==""
 
sort ctn year month
order ctn year month

by ctn year month: egen dom_sold=mean(duration_sold) // the average days on market for the ultimately-sold houses
sum duration_sold, de

sum price_ratio, de
replace price_ratio=. if price_ratio>r(p99) // unreasonable price data
replace price_ratio=. if price_ratio<r(p1)  // unreasonable price data

gen x=.
replace x=price_ratio if dummy_sold==1
by ctn year month: egen ptl_sold=mean(x) // the average price-to-list for the ultimately-sold houses
drop x

keep ctn year month dom_sold ptl_sold
by ctn year month: keep if _n==1

save "$path_analysis/zillow_ctn_year_month.dta", replace
